
use "CGsurveyandIDELA_baseline_endline.dta", clear

**OUTCOMES**

*caregiver wellbeing*
codebook phq8_1_el phq8_2_el phq8_3_el phq8_4_el phq8_5_el phq8_6_el phq8_7_el phq8_8_el ///
gad1_el gad2_el gad3_el gad4_el gad5_el gad6_el gad7_el

foreach var of varlist phq8_1_el phq8_2_el phq8_3_el phq8_4_el phq8_5_el phq8_6_el phq8_7_el phq8_8_el ///
gad1_el gad2_el gad3_el gad4_el gad5_el gad6_el gad7_el {
replace `var'=. if `var'==-99
}

alpha phq8_1_el phq8_2_el phq8_3_el phq8_4_el phq8_5_el phq8_6_el phq8_7_el phq8_8_el, item
*.90
alpha gad1_el gad2_el gad3_el gad4_el gad5_el gad6_el gad7_el, item
*.93

alpha phq8_1_el phq8_2_el phq8_3_el phq8_4_el phq8_5_el phq8_6_el phq8_7_el phq8_8_el, item gen(PHQ_el)
alpha gad1_el gad2_el gad3_el gad4_el gad5_el gad6_el gad7_el, item gen(GAD_el)

	*and at baseline*
	codebook phq8_1_bl phq8_2_bl phq8_3_bl phq8_4_bl phq8_5_bl phq8_6_bl phq8_7_bl phq8_8_bl ///
	gad1_bl gad2_bl gad3_bl gad4_bl gad5_bl gad6_bl gad7_bl
	
	foreach var of varlist phq8_1_bl phq8_2_bl phq8_3_bl phq8_4_bl phq8_5_bl phq8_6_bl phq8_7_bl phq8_8_bl ///
	gad1_bl gad2_bl gad3_bl gad4_bl gad5_bl gad6_bl gad7_bl {
	replace `var'=. if `var'==-99
	}
	
	alpha phq8_1_bl phq8_2_bl phq8_3_bl phq8_4_bl phq8_5_bl phq8_6_bl phq8_7_bl phq8_8_bl, item gen(PHQ_bl)
	*0.83
	alpha gad1_bl gad2_bl gad3_bl gad4_bl gad5_bl gad6_bl gad7_bl, item gen(GAD_bl)
	*0.90

*parenting self-efficacy*
codebook pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el

foreach var of varlist pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el {
	replace `var'=. if `var'==-99
	}

ssc install runmplus
ssc install lstrfun 
ssc install findname
ssc install strparse	

runmplus pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
type (efa 1 5) ///
missing =. 
*1 2 4; 5-9; 10 13-16; 10-13
codebook pse5_di_el
*pse5: generally think do great job in encouraging and supporting kinds of behavoir you want child to have
*so discipline but worded very differently than the others

runmplus pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el;  ///
pseeng_el by pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*.065 .965 .955 .038

runmplus pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el;  ///
pseeng_el by pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el; ///
pse5_di_el with pse6_ca_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*.056 .974 .966 .037
*but 10 really does not load on play (and 13 only sort of loads on engagement)
*and play and engaged quite correlated 0.776

runmplus pse1_di_el pse2_di_el pse4_di_el pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*this is definitely worse fit though
*loadings all seem similar so if we get to fit we like we are looking at alphas, let's compare

alpha pse1_di_el pse2_di_el pse4_di_el, item
alpha pse5_di_el pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el, item
*better without 5
alpha pse1_di_el pse2_di_el pse4_di_el pse5_di_el, item
*same
alpha pse10_ca_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, item
*huh, despite low factor loadings this is better with 10, better without 14 (though very similar with or without either)
alpha pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_e, item
*best with all four here too

alpha pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, item
*but still think they could be all together (and looks like 14 could go or stay)

runmplus pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*.094 .929 .913 .054

runmplus pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el  pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el pse11_ca_el pse12_pl_el pse13_pl_el  pse15_pl_el pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*worse

*biggest modindices with all of these is that everything wants its own correlation with 11
runmplus pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*better dropping 11 
*.077 .956 .945 .043
*still better aas 4 factors but splittling those play items does not make sense in terms of what they say/ask
*and those subscales SUPER correlated

*PSENUR_E WITH
*    PSEDIS_EL         -0.036      0.030     -1.206      0.228

* PSEPLY_E WITH
*    PSEDIS_EL         -0.078      0.029     -2.677      0.007
*    PSENUR_EL          0.443      0.023     18.951      0.000



runmplus pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el; ///
pse12_pl_el with pse13_pl_el; ///
pse15_pl_el with pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL))
*.060 .974 .966 .038
*loadings all super similar so can go with mean scores

*PSENUR_E WITH
*    PSEDIS_EL         -0.036      0.030     -1.206      0.228

* PSEPLY_E WITH
*    PSEDIS_EL         -0.076      0.030     -2.562      0.010
*    PSENUR_EL          0.462      0.024     19.618      0.000



runmplus pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, ///
idvariable (hhid) ///
categorical (pse1_di_el pse2_di_el pse4_di_el pse6_ca_el pse7_ca_el pse8_ca_el ///
pse9_ca_el pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el) ///
model (psedis_el by pse1_di_el pse2_di_el pse4_di_el; ///
psenur_el by pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el; ///
pseply_el by pse10_ca_el  pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el; ///
pse12_pl_el with pse13_pl_el; ///
pse15_pl_el with pse16_pl_el) ///
missing =. ///
output (STDYX modindices(ALL))
*not quite as good a fit but similar results (unclear with 4 categories which way it shoudl be ran)
*but both suggest mean scores work

*would prefer slightly better fit (all sorts of extra correlations model wants but points to mean score at end regardless)

****DROPPING 5 and 11****
alpha pse1_di_el pse2_di_el pse4_di_el, item gen(psedis_el)
*0.85
alpha pse6_ca_el pse7_ca_el pse8_ca_el pse9_ca_el, item gen(psenur_el)
*0.89
alpha pse10_ca_el pse12_pl_el pse13_pl_el pse14_pl_el pse15_pl_el pse16_pl_el, item gen(pseply_el)
*0.91

	*and at baseline*
	codebook pse1_di_bl pse2_di_bl pse3_di_bl pse4_di_bl pse5_di_bl pse6_ca_bl pse7_ca_bl pse8_ca_bl ///
	pse9_ca_bl pse10_ca_bl pse11_ca_bl pse12_pl_bl pse13_pl_bl pse14_pl_bl pse15_pl_bl pse16_pl_bl

	foreach var of varlist pse1_di_bl pse2_di_bl pse3_di_bl pse4_di_bl pse5_di_bl pse6_ca_bl pse7_ca_bl pse8_ca_bl ///
	pse9_ca_bl pse10_ca_bl pse11_ca_bl pse12_pl_bl pse13_pl_bl pse14_pl_bl pse15_pl_bl pse16_pl_bl {
	replace `var'=. if `var'==-99
	}
	
	alpha pse1_di_bl pse2_di_bl pse4_di_bl, item gen(psedis_bl)
	*0.73
	alpha pse6_ca_bl pse7_ca_bl pse8_ca_bl pse9_ca_bl, item gen(psenur_bl)
	*0.87
	alpha pse10_ca_bl pse12_pl_bl pse13_pl_bl pse14_pl_bl pse15_pl_bl pse16_pl_bl, item gen(pseply_bl)
	*0.81

*parenting stress*
codebook pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el
foreach var of varlist pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el {
	replace `var'=. if `var'==-99
	}
	
runmplus pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el, ///
type (efa 1 3) ///
missing =. 
*TWO FACTOR SOLUTION: 9 11 13 (with 9 carrying the factor); 10-16
*so 9 is really doing something different 
*and has the most almost nevers
*three factor has two nothing factors and one with 9 AND 11 dropped

alpha pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el, item
*alpha likes all of them though
*.9029
alpha  pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el, item
*.8915
alpha  pss_10_el pss_12_el pss_13_el pss_15_el pss_16_el, item
*8938
alpha pss_9_el pss_10_el pss_12_el pss_13_el pss_15_el pss_16_el, item
*.9006

irt grm pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el
est sto grm_pss_el

irt pcm pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el
est sto pcm_pss_el

lrtest grm_pss_el pcm_pss_el
*should use grm

irt grm pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el
irtgraph iif
*11 telling us the least, 9 and 13 next and give us same info
irtgraph icc pss_9_el
irtgraph icc pss_11_el
irtgraph icc pss_13_el
*but they work, just not adding a ton

runmplus pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el, ///
idvariable (hhid) ///
model (pss_el by pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*mean scores definitely more indicated with 9, 11, 13 dropped (as they all have lower loadings

runmplus  pss_10_el  pss_12_el  pss_15_el pss_16_el, ///
idvariable (hhid) ///
model (pss_el by  pss_10_el  pss_12_el  pss_15_el pss_16_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*and fit way better, RMSEA goes from .134 to .075 (.995 .986 .009)

runmplus pss_9_el pss_10_el pss_11_el pss_12_el pss_13_el pss_15_el pss_16_el, ///
idvariable (hhid) ///
model (pss1_el by pss_10_el  pss_12_el  pss_15_el pss_16_el; ///
pss2_el by pss_9_el pss_11_el pss_13_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*.078 .980 .968 .021
*this fits nearly as well but SUPER highly correlated (0.833)
*model wants 9 and 10 to correlate

alpha  pss_10_el pss_12_el pss_15_el pss_16_el, item
*8964
*let's just do this subset for now and revisit

alpha  pss_10_el pss_12_el pss_15_el pss_16_el, item gen(pss_el)

	*and at baseline*
	codebook pss_3_bl m_e_oth20_bl pss_4_bl pss_9_bl pss_10_bl pss_11_bl pss_12_bl pss_13_bl pss_15_bl m_e_oth23_bl pss_16_bl
	foreach var of varlist pss_3_bl m_e_oth20_bl pss_4_bl pss_9_bl pss_10_bl pss_11_bl pss_12_bl pss_13_bl pss_15_bl m_e_oth23_bl pss_16_bl {
	replace `var'=. if `var'==-99
	}

	alpha  pss_10_bl pss_12_bl pss_15_bl pss_16_bl, item gen(pss_bl)
	*.8298
	

*parenting discipline*
codebook p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el
foreach var of varlist p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el {
replace `var'=. if `var'==-99
	}
runmplus p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el, ///
type (efa 1 3) ///
missing =. 
*only 1 factor solution loads, with 1F and 1M not loading
*so loading as JUST a positive scale

alpha p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el, item
*.55
alpha p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1l_el, item
*.66
	
	*baseline*
	codebook p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl
	foreach var of varlist p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl {
	replace `var'=. if `var'==-99
	}
	
	alpha p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1l_bl, item
	*.52

	
*reported caregiver-child interactions*
codebook cg_env1_el cg_env1a_el cg_env2_el cg_env2a_el cg_env3_el cg_env3a_el cg_env4_el cg_env4a_el cg_env5_el ///
cg_env5a_el cg_env6_el cg_env6a_el cg_env7_el cg_env7a_el cg_env8_el cg_env8a_el cg_env9_el cg_env9a_el ///
cg_env10_el cgenv10a_el

foreach var of varlist cg_env1_el cg_env1a_el cg_env2_el cg_env2a_el cg_env3_el cg_env3a_el cg_env4_el cg_env4a_el cg_env5_el ///
cg_env5a_el cg_env6_el cg_env6a_el cg_env7_el cg_env7a_el cg_env8_el cg_env8a_el cg_env9_el cg_env9a_el ///
cg_env10_el cgenv10a_el {
	replace `var'=. if `var'==-99
	}
gen read_el=cg_env1a_el 
replace read_el=0 if cg_env1_el==0
gen tell_el=cg_env2a_el 
replace tell_el=0 if cg_env2_el==0
gen sing_el=cg_env3a_el 
replace sing_el=0 if cg_env3_el==0
gen play_el=cg_env4a_el 
replace play_el=0 if cg_env4_el==0
gen name_el=cg_env5a_el 
replace name_el=0 if cg_env5_el==0
gen coun_el=cg_env6a_el 
replace coun_el=0 if cg_env6_el==0
gen draw_el=cg_env7a_el 
replace draw_el=0 if cg_env7_el==0
gen chor_el=cg_env8a_el 
replace chor_el=0 if cg_env8_el==0
gen reli_el=cg_env9a_el 
replace reli_el=0 if cg_env9_el==0
gen meal_el=cgenv10a_el 
replace meal_el=0 if cg_env10_el==0

runmplus read_el tell_el sing_el play_el name_el coun_el draw_el chor_el reli_el meal_el, ///
type (efa 1 5) ///
missing =.
*4 factor is best fit that still is sort of coherent but not really 4 factors
*reading more or less stays on own (weak double loading of telling stories)
*telling stories, singing, playing, nameing things, drawing together
*counting on own, weirdly
*chores, religion, sitting with at meals own too

*for 3 factor reading and stories together; counting joins that other naming etc group; other three on own
*singing and playing weakly load on reading/telling stories

runmplus read_el tell_el sing_el play_el name_el coun_el draw_el chor_el reli_el meal_el, ///
idvariable (hhid) ///
model (story_el by read_el tell_el; ///
funpl_el by sing_el play_el name_el coun_el draw_el; ///
other_el by chor_el reli_el meal_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*.072 .956 .938 .0032
*loadings on other are weaker aside from chores

runmplus read_el tell_el sing_el play_el name_el coun_el draw_el, ///
idvariable (hhid) ///
model (story_el by read_el tell_el; ///
funpl_el by sing_el play_el name_el coun_el draw_el) ///
missing =. ///
output (STDYX modindices(ALL)) 
*but fit is better with it
*these subscales are quite correlated and have similar loads so even though EFA says not one factor..

alpha read_el tell_el sing_el play_el name_el coun_el draw_el, item
*0.8776
*my instinct is to do this for now
alpha read_el tell_el sing_el play_el name_el coun_el draw_el, item gen(ecdact_el)
*correlates very highly, 99%, with version of factor scores I was playing with


	*and at baseline*
	codebook cg_env1_bl cg_env1a_bl cg_env2_bl cg_env2a_bl cg_env3_bl cg_env3a_bl cg_env4_bl cg_env4a_bl cg_env5_bl ///
	cg_env5a_bl cg_env6_bl cg_env6a_bl cg_env7_bl cg_env7a_bl cg_env8_bl cg_env8a_bl cg_env9_bl cg_env9a_bl ///
	cg_env10_bl cgenv10a_bl
	
	foreach var of varlist  cg_env1_bl cg_env1a_bl cg_env2_bl cg_env2a_bl cg_env3_bl cg_env3a_bl cg_env4_bl cg_env4a_bl cg_env5_bl ///
	cg_env5a_bl cg_env6_bl cg_env6a_bl cg_env7_bl cg_env7a_bl cg_env8_bl cg_env8a_bl cg_env9_bl cg_env9a_bl ///
	cg_env10_bl cgenv10a_bl {
	replace `var'=. if `var'==-99
	}
	
	gen read_bl=cg_env1a_bl 
	replace read_bl=0 if cg_env1_bl==0
	gen tbll_bl=cg_env2a_bl 
	replace tbll_bl=0 if cg_env2_bl==0
	gen sing_bl=cg_env3a_bl 
	replace sing_bl=0 if cg_env3_bl==0
	gen play_bl=cg_env4a_bl 
	replace play_bl=0 if cg_env4_bl==0
	gen name_bl=cg_env5a_bl 
	replace name_bl=0 if cg_env5_bl==0
	gen coun_bl=cg_env6a_bl 
	replace coun_bl=0 if cg_env6_bl==0
	gen draw_bl=cg_env7a_bl 
	replace draw_bl=0 if cg_env7_bl==0
	gen chor_bl=cg_env8a_bl 
	replace chor_bl=0 if cg_env8_bl==0
	gen rbli_bl=cg_env9a_bl 
	replace rbli_bl=0 if cg_env9_bl==0
	*oops
	rename rbli_bl reli_bl
	gen meal_bl=cgenv10a_bl 
	replace meal_bl=0 if cg_env10_bl==0
	
	alpha read_bl tbll_bl sing_bl play_bl name_bl coun_bl draw_bl, item gen(ecdact_bl)
	*0.8058


*child play*
*not pre-registered*

*doesn't correlate with caregiver child activities highly, interestingly, since many 
*same items only without adult component
codebook fc_pl_1_el fc_pl_2_el fc_pl_21_el fc_pl_22_el fc_pl_23_el fc_pl_3_el fc_pl_31_el fc_pl_32_el ///
fc_pl_33_el fc_pl_34_el fc_pl_35_el fc_pl_36_el fc_pl_37_el

foreach var of varlist fc_pl_1_el fc_pl_2_el fc_pl_21_el fc_pl_22_el fc_pl_23_el fc_pl_3_el fc_pl_31_el fc_pl_32_el ///
fc_pl_33_el fc_pl_34_el fc_pl_35_el fc_pl_36_el fc_pl_37_el {
replace `var'=. if `var'==-99
replace `var'=. if `var'==-77
	}
	
runmplus fc_pl_1_el fc_pl_21_el fc_pl_22_el fc_pl_23_el fc_pl_31_el fc_pl_32_el ///
fc_pl_33_el fc_pl_34_el fc_pl_35_el fc_pl_36_el fc_pl_37_el, ///
type (efa 1 5) ///
missing =.
*three factor: 1 (weak), 21 (neg), 22; 31, 32, 33, 35; 34, 36, 37
*23 doesn't load
*two factor (poor model fit): 31, 32, 33, 35; 34, 36, 37

*the #2 items not doing much

alpha fc_pl_31_el fc_pl_32_el fc_pl_33_el fc_pl_34_el fc_pl_35_el fc_pl_36_el fc_pl_37_el, item
alpha fc_pl_31_el fc_pl_32_el fc_pl_33_el  fc_pl_35_el, item
alpha fc_pl_34_el fc_pl_36_el fc_pl_37_el, item

*but they work well all together even though factor analyses fit once all sorts of cross dependencies
*no factor analysis model really makes full sense to me
*(played with various CFA models too and they end  up wanting eveyring to covary with everything)

*alpha same with or without laughing and play can be serious 
*and disctriubtion of that variable is very skewed
*so am dropping

alpha fc_pl_31_el fc_pl_32_el fc_pl_33_el fc_pl_34_el fc_pl_36_el fc_pl_37_el, item gen(playfreq_el)
*alpha 0.82


	*and at baseline*
	codebook fc_pl_1_bl fc_pl_2_bl fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl fc_pl_3_bl fc_pl_31_bl fc_pl_32_bl ///
	fc_pl_33_bl fc_pl_34_bl fc_pl_35_bl fc_pl_36_bl fc_pl_37_bl
	
	foreach var of varlist fc_pl_1_bl fc_pl_2_bl fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl fc_pl_3_bl fc_pl_31_bl fc_pl_32_bl ///
	fc_pl_33_bl fc_pl_34_bl fc_pl_35_bl fc_pl_36_bl fc_pl_37_bl {
	replace `var'=. if `var'==-99
	replace `var'=. if `var'==-77
		}
		
	alpha fc_pl_31_bl fc_pl_32_bl fc_pl_33_bl fc_pl_34_bl fc_pl_36_bl fc_pl_37_bl, item gen(playfreq_bl)
	*0.71

	
	
*caregiver beliefs about play/learning through play*

codebook job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el job55_el jobo_el 
codebook pl_imp0_el pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el pl_imp5_el pl_imp6_el sel_12_el sel_12_o_el sel_22_el ///
sel_22_o_el sel_32_el sel_32_o_el sel_42_el sel_42_o_el sel_52_el sel_52_o_el


*1=parenting/discipline; 2=treaching/education; 3=taking care of children
*4=fulfilling needs; 5=providing nutrition; 6=teaching them good behavior
*7=keeping children clean; 8=play; 9=providing love
*10=providing psychological support; 11=protection

runmplus job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el, ///
type (efa 1 5) ///
missing =.

***I think these are probably best as descriptive***
sum job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el


	***at baseline***
	codebook job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl job7_bl job8_bl job9_bl job10_bl job11_bl job55_bl job_cg_o_bl 
	list hhid job_cg_o_bl if job_cg_o_bl!=""
	*1=parenting/discipline; 2=treaching/education; 3=taking care of children
	*4=fulfilling needs; 5=providing nutrition; 6=teaching them good behavior
	*7=keeping children clean; 8=play; 9=providing love
	*10=providing psychological support; 11=protection
	*once said medicine which could be under fulfilling needs or taking care of
	list job3_bl job4_bl if hhid=="ah5849"




foreach var of varlist pl_imp0_el pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el pl_imp5_el pl_imp6_el sel_12_el sel_12_o_el sel_22_el ///
sel_22_o_el sel_32_el sel_32_o_el sel_42_el sel_42_o_el sel_52_el sel_52_o_el {
replace `var'=. if `var'==-99
	}
	
*create 4 point likerts from sel questions
gen sel1_el=.
replace sel1_el=1 if sel_12_el==0 & sel_12_o_el==1
replace sel1_el=2 if sel_12_el==0 & sel_12_o_el==0
replace sel1_el=3 if sel_12_el==1 & sel_12_o_el==0
replace sel1_el=4 if sel_12_el==1 & sel_12_o_el==1

gen sel2_el=.
replace sel2_el=1 if sel_22_el==0 & sel_22_o_el==1
replace sel2_el=2 if sel_22_el==0 & sel_22_o_el==0
replace sel2_el=3 if sel_22_el==1 & sel_22_o_el==0
replace sel2_el=4 if sel_22_el==1 & sel_22_o_el==1

gen sel3_el=.
replace sel3_el=1 if sel_32_el==0 & sel_32_o_el==1
replace sel3_el=2 if sel_32_el==0 & sel_32_o_el==0
replace sel3_el=3 if sel_32_el==1 & sel_32_o_el==0
replace sel3_el=4 if sel_32_el==1 & sel_32_o_el==1

gen sel4_el=.
replace sel4_el=1 if sel_42_el==0 & sel_42_o_el==1
replace sel4_el=2 if sel_42_el==0 & sel_42_o_el==0
replace sel4_el=3 if sel_42_el==1 & sel_42_o_el==0
replace sel4_el=4 if sel_42_el==1 & sel_42_o_el==1

gen sel5_el=.
replace sel5_el=1 if sel_52_el==0 & sel_52_o_el==1
replace sel5_el=2 if sel_52_el==0 & sel_52_o_el==0
replace sel5_el=3 if sel_52_el==1 & sel_52_o_el==0
replace sel5_el=4 if sel_52_el==1 & sel_52_o_el==1


*play 5 and 6 should reverse code
runmplus pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el pl_imp5_el pl_imp6_el ///
sel1_el sel2_el sel3_el sel4_el sel5_el, ///
type (efa 1 5) ///
missing =.
*by time you get ok fit results do not make a ton of sense
*but 1-4 play and sel items seems to generally hang together as 2 subscales
alpha pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el pl_imp5_el pl_imp6_el ///
sel1_el sel2_el sel3_el sel4_el sel5_el, item

alpha pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el pl_imp5_el pl_imp6_el, rev(pl_imp5_el pl_imp6_el) item
alpha sel1_el sel2_el sel3_el sel4_el sel5_el, item
alpha pl_imp5_el pl_imp6_el sel1_el sel2_el sel3_el sel4_el sel5_el, item
*5 and 6 less clear where they go
*will not use (not in baselin anyway, we added them)

alpha pl_imp1_el pl_imp2_el pl_imp3_el pl_imp4_el, item gen(belpl_el)
*.95
alpha sel1_el sel2_el sel3_el sel4_el sel5_el, item gen(belsel_el)
*.90
sum belpl_el belsel_el pl_imp5_el pl_imp6_el
*added them because not a lot of variation in beliefs about play, which is still true.


	*and at baseline*
	foreach var of varlist pl_imp0_bl pl_imp1_bl pl_imp2_bl pl_imp3_bl pl_imp4_bl sel_12_bl sel_12_o_bl sel_22_bl ///
	sel_22_o_bl sel_32_bl sel_32_o_bl sel_42_bl sel_42_o_bl sel_52_bl sel_52_o_bl {
	replace `var'=. if `var'==-99
	}
	
	*create 4 point likerts from sel questions
	gen sel1_bl=.
	replace sel1_bl=1 if sel_12_bl==0 & sel_12_o_bl==1
	replace sel1_bl=2 if sel_12_bl==0 & sel_12_o_bl==0
	replace sel1_bl=3 if sel_12_bl==1 & sel_12_o_bl==0
	replace sel1_bl=4 if sel_12_bl==1 & sel_12_o_bl==1

	gen sel2_bl=.
	replace sel2_bl=1 if sel_22_bl==0 & sel_22_o_bl==1
	replace sel2_bl=2 if sel_22_bl==0 & sel_22_o_bl==0
	replace sel2_bl=3 if sel_22_bl==1 & sel_22_o_bl==0
	replace sel2_bl=4 if sel_22_bl==1 & sel_22_o_bl==1

	gen sel3_bl=.
	replace sel3_bl=1 if sel_32_bl==0 & sel_32_o_bl==1
	replace sel3_bl=2 if sel_32_bl==0 & sel_32_o_bl==0
	replace sel3_bl=3 if sel_32_bl==1 & sel_32_o_bl==0
	replace sel3_bl=4 if sel_32_bl==1 & sel_32_o_bl==1

	gen sel4_bl=.
	replace sel4_bl=1 if sel_42_bl==0 & sel_42_o_bl==1
	replace sel4_bl=2 if sel_42_bl==0 & sel_42_o_bl==0
	replace sel4_bl=3 if sel_42_bl==1 & sel_42_o_bl==0
	replace sel4_bl=4 if sel_42_bl==1 & sel_42_o_bl==1

	gen sel5_bl=.
	replace sel5_bl=1 if sel_52_bl==0 & sel_52_o_bl==1
	replace sel5_bl=2 if sel_52_bl==0 & sel_52_o_bl==0
	replace sel5_bl=3 if sel_52_bl==1 & sel_52_o_bl==0
	replace sel5_bl=4 if sel_52_bl==1 & sel_52_o_bl==1

	alpha pl_imp1_bl pl_imp2_bl pl_imp3_bl pl_imp4_bl, item gen(belpl_bl)
	*.90
	alpha sel1_bl sel2_bl sel3_bl sel4_bl sel5_bl, item gen(belsel_bl)
	*.73
	sum belpl_bl belsel_bl 
	*added them because not a lot of variation in beliefs about play, which is still true...

save "using.dta", replace
use "using.dta", clear

*child development (IDELA)
*IDELA 999 missing, n/a skipped via skip pattern* 
codebook item1_1_Iel item1_2_Iel item1_3_Iel item1_4_Iel item1_5_Iel item2_1_Iel item2_2_Iel item2_3_Iel item2_4_Iel item3_1_Iel item3_2_Iel item4_1_Iel item4_2_Iel item4_3_Iel item4_4_Iel item4_5_Iel
*IDELA online socring says to code missing as 0
foreach var of varlist item1_1_Iel-item4_5_Iel {
replace `var'=0 if `var'==999
	}

codebook item1_1_Iel item1_2_Iel item1_3_Iel item1_4_Iel item1_5_Iel
*1 possible for each part, 5 total
egen item1_el=rowmean(item1_1_Iel item1_2_Iel item1_3_Iel item1_4_Iel item1_5_Iel)
sum item1_el item1_1_Iel item1_2_Iel item1_3_Iel item1_4_Iel item1_5_Iel

codebook item2_1_Iel item2_2_Iel item2_3_Iel item2_4_Iel
*1 possible for each part, 4 total
egen item2_el=rowmean(item2_1_Iel item2_2_Iel item2_3_Iel item2_4_Iel)
sum item2_el item2_1_Iel item2_2_Iel item2_3_Iel item2_4_Iel 

codebook item3_1_Iel item3_2_Iel
*1 possible for each part, 2 total
egen item3_el=rowmean(item3_1_Iel item3_2_Iel)
sum item3_el item3_1_Iel item3_2_Iel

codebook item4_1_Iel item4_2_Iel item4_3_Iel item4_4_Iel item4_5_Iel
*1 possible for each part, 5 total
egen item4_el=rowmean(item4_1_Iel item4_2_Iel item4_3_Iel item4_4_Iel item4_5_Iel)
sum item4_el item4_1_Iel item4_2_Iel item4_3_Iel item4_4_Iel item4_5_Iel

*we added number to number 5 but in pilot at least worked best with all of them
*only people who got enough of first 10 even asked second 10
foreach var of varlist item5_11_Iel item5_12_Iel item5_13_Iel item5_14_Iel item5_15_Iel item5_16_Iel item5_17_Iel item5_18_Iel item5_19_Iel item5_20_Iel {
replace `var'="0" if `var'=="n/a"
destring `var', replace
	}
codebook item5_1_Iel item5_2_Iel item5_3_Iel item5_4_Iel item5_5_Iel item5_6_Iel item5_7_Iel item5_8_Iel item5_9_Iel item5_10_Iel item5_11_Iel item5_12_Iel item5_13_Iel item5_14_Iel item5_15_Iel item5_16_Iel item5_17_Iel item5_18_Iel item5_19_Iel item5_20_Iel
*1 possible for each part, 20 total
*not sure why missingness (versus n/a) slightly increases in second set
foreach var of varlist item5_11_Iel item5_12_Iel item5_13_Iel item5_14_Iel item5_15_Iel item5_16_Iel item5_17_Iel item5_18_Iel item5_19_Iel item5_20_Iel {
replace `var'=0 if `var'==. & item5_1_Iel!=.
	}
egen item5_el=rowmean(item5_1_Iel-item5_20_Iel)
sum item5_el item5_1_Iel-item5_20_Iel
	
codebook item6_1_Iel item6_2_Iel item6_31_Iel item6_4_Iel item6_p1_Iel item6_p2_Iel
*1 possible for each part, 4 total
foreach var of varlist item6_1_Iel item6_2_Iel item6_p1_Iel item6_p2_Iel {
replace `var'=0 if `var'==999
	}
foreach var of varlist item6_31_Iel item6_4_Iel {
replace `var'="0" if `var'=="999"
replace `var'="0" if `var'=="n/a"
destring `var', replace
	}
	
codebook item6_1_Iel item6_2_Iel item6_31_Iel item6_4_Iel item6_p1_Iel item6_p2_Iel
*not all just missing 184 truly missing ID EL and IDELA likes missing to be 0
foreach var of varlist item6_31_Iel item6_4_Iel {
replace `var'=0 if `var'==. & item6_1_Iel !=.
	}

egen item6_el=rowmean( item6_1_Iel item6_2_Iel item6_31_Iel item6_4_Iel)
sum item6_el  item6_1_Iel item6_2_Iel item6_31_Iel item6_4_Iel

codebook item7_1_Iel-item16_2_Iel
tab item9_Iel 
tab item15_1_Iel
tab item15_2_Iel


foreach var of varlist item7_1_Iel item7_2_Iel item7_3_Iel item9_Iel ///
item10_1_Iel item10_2_Iel item10_3_Iel item11_1_Iel item11_2_Iel item12_1_Iel ///
item13_1_Iel item13_2_Iel item13_3_Iel item13_4_Iel item15_1_Iel item15_2_Iel item16_1_Iel item16_2_Iel {
replace `var'=0 if `var'==999
	}
replace item9_Iel=0 if item9_Iel==-99
*should check where this came from
*and I am asusming -3 is a type and was 3
replace item9_Iel=3 if item9_Iel==-3
*same assumption
replace item15_1_Iel=8 if item15_1_Iel==-8
replace item15_1_Iel=5 if item15_1_Iel==-5
replace item15_2_Iel=8 if item15_2_Iel==-8
replace item15_2_Iel=5 if item15_2_Iel==-5
replace item15_2_Iel=6 if item15_2_Iel==-6


foreach var of varlist item11_3_Iel item12_2_Iel {
replace `var'="0" if `var'=="999"
replace `var'="0" if `var'=="n/a"
destring `var', replace
}

codebook item7_1_Iel item7_2_Iel item7_3_Iel
*1 possible for each part, 3 total
egen item7_el=rowmean(item7_1_Iel item7_2_Iel item7_3_Iel)
sum item7_el item7_1_Iel item7_2_Iel item7_3_Iel

codebook item9_Iel
sum item9_Iel, det
tab item9_Iel
gen item9_el=item9_Iel/10

codebook item10_1_Iel item10_2_Iel item10_3_Iel
*1 possible for each part, 3 total
egen item10_el=rowmean(item10_1_Iel item10_2_Iel)
sum item10_el item10_1_Iel item10_2_Iel item10_3_Iel

codebook item11_1_Iel item11_2_Iel item11_3_Iel
*1 possible for each part, 3 total
replace item11_3_Iel=0 if item11_3_Iel==. & item11_2_Iel !=.
egen item11_el=rowmean(item11_1_Iel item11_2_Iel item11_3_Iel)
sum item11_el item11_1_Iel item11_2_Iel item11_3_Iel

codebook item12_1_Iel item12_2_Iel
*1 possible for each part, 2 total
replace item12_2_Iel=0 if item12_2_Iel==. & item12_1_Iel !=.

egen item12_el=rowmean(item12_1_Iel item12_2_Iel)
sum item12_el item12_1_Iel item12_2_Iel

codebook item13_1_Iel item13_2_Iel item13_3_Iel item13_4_Iel
*1 possible for each part, 4 total
egen item13_el=rowmean(item13_1_Iel item13_2_Iel item13_3_Iel item13_4_Iel)
sum item13_el item13_1_Iel item13_2_Iel item13_3_Iel item13_4_Iel

***NO ITEM 14**

codebook item15_1_Iel item15_2_Iel
*each out of 10
egen item15_el=rowmean(item15_1_Iel item15_2_Iel)
sum item15_el item15_1_Iel item15_2_Iel
replace item15_el=item15_el/10
sum item15_el item15_1_Iel item15_2_Iel

codebook item16_1_Iel item16_2_Iel
*1 possible for each part, 2 total
egen item16_el=rowmean(item16_1_Iel item16_2_Iel)
sum item16_el item16_1_Iel item16_2_Iel

codebook item17_1_Iel item17_2_Iel item17_3_Iel item17_4_Iel item17_5_Iel item17_6_Iel item17_7_Iel item17_8_Iel item17_9_Iel item17_10_Iel item17_11_Iel item17_12_Iel item17_13_Iel item17_14_Iel item17_15_Iel item17_16_Iel item17_17_Iel item17_18_Iel item17_19_Iel item17_20_Iel
foreach var of varlist item17_11_Iel item17_12_Iel item17_13_Iel item17_14_Iel item17_15_Iel item17_16_Iel item17_17_Iel item17_18_Iel item17_19_Iel item17_20_Iel {
replace `var'="0" if `var'=="n/a"
destring `var', replace
	}
codebook item17_1_Iel item17_2_Iel item17_3_Iel item17_4_Iel item17_5_Iel item17_6_Iel item17_7_Iel item17_8_Iel item17_9_Iel item17_10_Iel item17_11_Iel item17_12_Iel item17_13_Iel item17_14_Iel item17_15_Iel item17_16_Iel item17_17_Iel item17_18_Iel item17_19_Iel item17_20_Iel
foreach var of varlist item17_11_Iel item17_12_Iel item17_13_Iel item17_14_Iel item17_15_Iel item17_16_Iel item17_17_Iel item17_18_Iel item17_19_Iel item17_20_Iel {
replace `var'=0 if `var'==. & item17_1_Iel!=.
	}

codebook item17_1_Iel item17_2_Iel item17_3_Iel item17_4_Iel item17_5_Iel item17_6_Iel item17_7_Iel item17_8_Iel item17_9_Iel item17_10_Iel item17_11_Iel item17_12_Iel item17_13_Iel item17_14_Iel item17_15_Iel item17_16_Iel item17_17_Iel item17_18_Iel item17_19_Iel item17_20_Iel
*1 possible for each part, 20 total
egen item17_el=rowmean(item17_1_Iel-item17_20_Iel)
sum item17_el item17_1_Iel-item17_20_Iel


codebook item18_1_Iel-item22_8_Iel

*and I am asusming - numbers are typos
replace item19_Iel=4 if item19_Iel==-4
replace item21_1_Iel=3 if item21_1_Iel==-3

drop el_Iel

foreach var of varlist item18_1_Iel-item22_8_Iel {
replace `var'=0 if `var'==999
	}

codebook item18_1_Iel item18_2_Iel
*1 possible for each part, 2 total
egen item18_el=rowmean(item18_1_Iel item18_2_Iel)
sum item18_el item18_1_Iel item18_2_Iel

codebook item19_Iel
gen item19_el=item19_Iel/4

codebook item20_1_Iel item20_2_Iel item20_3_Iel item20_4_Iel item20_5_Iel item20p1_Iel item20p2_Iel
*1 possible for each part, 5 total
egen item20_el=rowmean(item20_1_Iel item20_2_Iel item20_3_Iel item20_4_Iel item20_5_Iel)
sum item20_el item20_1_Iel item20_2_Iel item20_3_Iel item20_4_Iel item20_5_Iel 

codebook item21_1_Iel item21_2_Iel
gen item21_1_mean_el=item21_1_Iel/3
egen item21_el=rowmean(item21_1_mean_el item21_2_Iel)
sum item21_el item21_1_mean_el item21_2_Iel

codebook item22_1_Iel item22_2_Iel item22_3_Iel item22_4_Iel item22_5_Iel item22_6_Iel item22_7_Iel item22_8_Iel
*1 possible for each part, 8 total
egen item22_el=rowmean(item22_1_Iel item22_2_Iel item22_3_Iel item22_4_Iel item22_5_Iel item22_6_Iel item22_7_Iel item22_8_Iel)
sum item22_el item22_1_Iel item22_2_Iel item22_3_Iel item22_4_Iel item22_5_Iel item22_6_Iel item22_7_Iel item22_8_Iel

codebook item24_Iel
tab item24_Iel
*same assumption
replace item24_Iel=10 if item24_Iel==-10
replace item24_Iel=0 if item24_Iel==999
gen item24_el=item24_Iel/10
codebook item24_el item24_Iel

*not using language used here
codebook item28_1_Iel item28_2_Iel item28_3_Iel item28_4_Iel item28_5_Iel item28_6_Iel
foreach var of varlist item28_1_Iel item28_2_Iel item28_3_Iel item28_4_Iel item28_5_Iel item28_6_Iel {
replace `var'=0 if `var'==999
	}
codebook item28_1_Iel item28_2_Iel item28_3_Iel item28_4_Iel item28_5_Iel item28_6_Iel
*1 possible for each part, 6 total
egen item28_el=rowmean(item28_1_Iel item28_2_Iel item28_3_Iel item28_4_Iel item28_5_Iel item28_6_Iel)
sum item28_el item28_1_Iel item28_2_Iel item28_3_Iel item28_4_Iel item28_5_Iel item28_6_Iel 

*IDELA DOMAIN SCORES ENDLINE:
*EMERGENT LITERACY*
egen IDLIT_el=rowmean(item15_el item16_el item17_el item18_el item19_el item20_el item28_el)

*EMERGENT NUMERACY*
egen IDNUM_el=rowmean(item2_el item3_el item4_el item5_el item6_el item7_el)

*SOCIAL EMOTIONAL*
egen IDSEL_el=rowmean(item1_el item9_el item10_el item11_el item12_el)

*MOTOR*
egen IDMOT_el=rowmean(item21_el item22_el item24_el)
sum IDLIT_el IDNUM_el IDSEL_el IDMOT_el

*****ITEM 13 WAS ADDED AS EXECUTIVE FUNCTION SO NOT INCLUDED IN THESE RUNS*****
*and we added 28 for literacy which I did include above*


	*and at baseline*
	*we have a few extra items here dropped based on not performing well at BL
	*mirroring endline in what calculating though
	codebook item1_1_Ibl item1_2_Ibl item1_3_Ibl item1_4_Ibl item1_5_Ibl item2_1_Ibl item2_2_Ibl item2_3_Ibl item2_4_Ibl item3_1_Ibl item3_2_Ibl item4_1_Ibl item4_2_Ibl item4_3_Ibl item4_4_Ibl item4_5_Ibl
	foreach var of varlist item1_1_Ibl-item4_5_Ibl {
	replace `var'=0 if `var'==999
	}

	codebook item1_1_Ibl item1_2_Ibl item1_3_Ibl item1_4_Ibl item1_5_Ibl
	egen item1_bl=rowmean(item1_1_Ibl item1_2_Ibl item1_3_Ibl item1_4_Ibl item1_5_Ibl)
	sum item1_bl item1_1_Ibl item1_2_Ibl item1_3_Ibl item1_4_Ibl item1_5_Ibl

	codebook item2_1_Ibl item2_2_Ibl item2_3_Ibl item2_4_Ibl
	egen item2_bl=rowmean(item2_1_Ibl item2_2_Ibl item2_3_Ibl item2_4_Ibl)
	sum item2_bl item2_1_Ibl item2_2_Ibl item2_3_Ibl item2_4_Ibl 

	codebook item3_1_Ibl item3_2_Ibl
	egen item3_bl=rowmean(item3_1_Ibl item3_2_Ibl)
	sum item3_bl item3_1_Ibl item3_2_Ibl

	codebook item4_1_Ibl item4_2_Ibl item4_3_Ibl item4_4_Ibl item4_5_Ibl
	egen item4_bl=rowmean(item4_1_Ibl item4_2_Ibl item4_3_Ibl item4_4_Ibl item4_5_Ibl)
	sum item4_bl item4_1_Ibl item4_2_Ibl item4_3_Ibl item4_4_Ibl item4_5_Ibl

	foreach var of varlist item5_11_Ibl item5_12_Ibl item5_13_Ibl item5_14_Ibl item5_15_Ibl item5_16_Ibl item5_17_Ibl item5_18_Ibl item5_19_Ibl item5_20_Ibl {
	replace `var'="0" if `var'=="n/a"
	destring `var', replace
	}
	codebook item5_1_Ibl item5_2_Ibl item5_3_Ibl item5_4_Ibl item5_5_Ibl item5_6_Ibl item5_7_Ibl item5_8_Ibl item5_9_Ibl item5_10_Ibl item5_11_Ibl item5_12_Ibl item5_13_Ibl item5_14_Ibl item5_15_Ibl item5_16_Ibl item5_17_Ibl item5_18_Ibl item5_19_Ibl item5_20_Ibl
	egen item5_bl=rowmean(item5_1_Ibl-item5_20_Ibl)
	sum item5_bl item5_1_Ibl-item5_20_Ibl
	
	codebook item6_1_Ibl item6_2_Ibl item6_31_Ibl item6_4_Ibl item6_p1_Ibl item6_p2_Ibl
	foreach var of varlist item6_1_Ibl item6_2_Ibl item6_p1_Ibl item6_p2_Ibl {
	replace `var'=0 if `var'==999
	}
	foreach var of varlist item6_31_Ibl item6_4_Ibl {
	replace `var'="0" if `var'=="999"
	replace `var'="0" if `var'=="n/a"
	destring `var', replace
	}
	
	codebook item6_1_Ibl item6_2_Ibl item6_31_Ibl item6_4_Ibl item6_p1_Ibl item6_p2_Ibl
	egen item6_bl=rowmean( item6_1_Ibl item6_2_Ibl item6_31_Ibl item6_4_Ibl)
	sum item6_bl  item6_1_Ibl item6_2_Ibl item6_31_Ibl item6_4_Ibl

	codebook item7_1_Ibl-item16_2_Ibl
	foreach var of varlist item7_1_Ibl item7_2_Ibl item7_3_Ibl item9_Ibl ///
	item10_1_Ibl item10_2_Ibl item10_3_Ibl item11_1_Ibl item11_2_Ibl item12_1_Ibl ///
	item13_1_Ibl item13_2_Ibl item13_3_Ibl item13_4_Ibl item15_1_Ibl item15_2_Ibl item16_1_Ibl item16_2_Ibl {
	replace `var'=0 if `var'==999
	}

	foreach var of varlist item11_3_Ibl item12_2_Ibl {
	replace `var'="0" if `var'=="999"
	replace `var'="0" if `var'=="n/a"
	destring `var', replace
	}

	codebook item7_1_Ibl item7_2_Ibl item7_3_Ibl
	egen item7_bl=rowmean(item7_1_Ibl item7_2_Ibl item7_3_Ibl)
	sum item7_bl item7_1_Ibl item7_2_Ibl item7_3_Ibl

	codebook item9_Ibl
	sum item9_Ibl, det
	tab item9_Ibl
	gen item9_bl=item9_Ibl/10

	codebook item10_1_Ibl item10_2_Ibl item10_3_Ibl
	egen item10_bl=rowmean(item10_1_Ibl item10_2_Ibl)
	sum item10_bl item10_1_Ibl item10_2_Ibl item10_3_Ibl
	
	codebook item11_1_Ibl item11_2_Ibl item11_3_Ibl
	replace item11_3_Ibl=0 if item11_3_Ibl==. & item11_2_Ibl !=.
	egen item11_bl=rowmean(item11_1_Ibl item11_2_Ibl item11_3_Ibl)
	sum item11_bl item11_1_Ibl item11_2_Ibl item11_3_Ibl

	codebook item12_1_Ibl item12_2_Ibl
	egen item12_bl=rowmean(item12_1_Ibl item12_2_Ibl)
	sum item12_bl item12_1_Ibl item12_2_Ibl

	codebook item13_1_Ibl item13_2_Ibl item13_3_Ibl item13_4_Ibl
	egen item13_bl=rowmean(item13_1_Ibl item13_2_Ibl item13_3_Ibl item13_4_Ibl)
	sum item13_bl item13_1_Ibl item13_2_Ibl item13_3_Ibl item13_4_Ibl

	codebook item15_1_Ibl item15_2_Ibl
	egen item15_bl=rowmean(item15_1_Ibl item15_2_Ibl)
	sum item15_bl item15_1_Ibl item15_2_Ibl
	replace item15_bl=item15_bl/10
	sum item15_bl item15_1_Ibl item15_2_Ibl

	codebook item16_1_Ibl item16_2_Ibl
	egen item16_bl=rowmean(item16_1_Ibl item16_2_Ibl)
	sum item16_bl item16_1_Ibl item16_2_Ibl

	codebook item17_1_Ibl item17_2_Ibl item17_3_Ibl item17_4_Ibl item17_5_Ibl item17_6_Ibl item17_7_Ibl item17_8_Ibl item17_9_Ibl item17_10_Ibl item17_11_Ibl item17_12_Ibl item17_13_Ibl item17_14_Ibl item17_15_Ibl item17_16_Ibl item17_17_Ibl item17_18_Ibl item17_19_Ibl item17_20_Ibl
	foreach var of varlist item17_11_Ibl item17_12_Ibl item17_13_Ibl item17_14_Ibl item17_15_Ibl item17_16_Ibl item17_17_Ibl item17_18_Ibl item17_19_Ibl item17_20_Ibl {
	replace `var'="0" if `var'=="n/a"
	destring `var', replace
	}
	codebook item17_1_Ibl item17_2_Ibl item17_3_Ibl item17_4_Ibl item17_5_Ibl item17_6_Ibl item17_7_Ibl item17_8_Ibl item17_9_Ibl item17_10_Ibl item17_11_Ibl item17_12_Ibl item17_13_Ibl item17_14_Ibl item17_15_Ibl item17_16_Ibl item17_17_Ibl item17_18_Ibl item17_19_Ibl item17_20_Ibl

	codebook item17_1_Ibl item17_2_Ibl item17_3_Ibl item17_4_Ibl item17_5_Ibl item17_6_Ibl item17_7_Ibl item17_8_Ibl item17_9_Ibl item17_10_Ibl item17_11_Ibl item17_12_Ibl item17_13_Ibl item17_14_Ibl item17_15_Ibl item17_16_Ibl item17_17_Ibl item17_18_Ibl item17_19_Ibl item17_20_Ibl
	egen item17_bl=rowmean(item17_1_Ibl-item17_20_Ibl)
	sum item17_bl item17_1_Ibl-item17_20_Ibl


	codebook item18_1_Ibl-item22_8_Ibl

	drop ep_Ibl

	foreach var of varlist item18_1_Ibl-item22_8_Ibl {
	replace `var'=0 if `var'==999
	}

	codebook item18_1_Ibl item18_2_Ibl
	egen item18_bl=rowmean(item18_1_Ibl item18_2_Ibl)
	sum item18_bl item18_1_Ibl item18_2_Ibl

	codebook item19_Ibl
	gen item19_bl=item19_Ibl/4

	codebook item20_1_Ibl item20_2_Ibl item20_3_Ibl item20_4_Ibl item20_5_Ibl item20p1_Ibl item20p2_Ibl
	egen item20_bl=rowmean(item20_1_Ibl item20_2_Ibl item20_3_Ibl item20_4_Ibl item20_5_Ibl)
	sum item20_bl item20_1_Ibl item20_2_Ibl item20_3_Ibl item20_4_Ibl item20_5_Ibl 
	
	codebook item21_1_Ibl item21_2_Ibl
	gen item21_1_mean_bl=item21_1_Ibl/3
	egen item21_bl=rowmean(item21_1_mean_bl item21_2_Ibl)
	sum item21_bl item21_1_mean_bl item21_2_Ibl

	codebook item22_1_Ibl item22_2_Ibl item22_3_Ibl item22_4_Ibl item22_5_Ibl item22_6_Ibl item22_7_Ibl item22_8_Ibl
	egen item22_bl=rowmean(item22_1_Ibl item22_2_Ibl item22_3_Ibl item22_4_Ibl item22_5_Ibl item22_6_Ibl item22_7_Ibl item22_8_Ibl)
	sum item22_bl item22_1_Ibl item22_2_Ibl item22_3_Ibl item22_4_Ibl item22_5_Ibl item22_6_Ibl item22_7_Ibl item22_8_Ibl

	codebook item24_Ibl
	tab item24_Ibl
	replace item24_Ibl=0 if item24_Ibl==999
	gen item24_bl=item24_Ibl/10
	codebook item24_bl item24_Ibl

	codebook item28_1_Ibl item28_2_Ibl item28_3_Ibl item28_4_Ibl item28_5_Ibl item28_6_Ibl
	foreach var of varlist item28_1_Ibl item28_2_Ibl item28_3_Ibl item28_4_Ibl item28_5_Ibl item28_6_Ibl {
	replace `var'=0 if `var'==999
	}
	codebook item28_1_Ibl item28_2_Ibl item28_3_Ibl item28_4_Ibl item28_5_Ibl item28_6_Ibl
	egen item28_bl=rowmean(item28_1_Ibl item28_2_Ibl item28_3_Ibl item28_4_Ibl item28_5_Ibl item28_6_Ibl)
	sum item28_bl item28_1_Ibl item28_2_Ibl item28_3_Ibl item28_4_Ibl item28_5_Ibl item28_6_Ibl 

*IDELA DOMAIN SCORES BASELINE:
*EMERGENT LITERACY*
egen IDLIT_bl=rowmean(item15_bl item16_bl item17_bl item18_bl item19_bl item20_bl item28_bl)

*EMERGENT NUMERACY*
egen IDNUM_bl=rowmean(item2_bl item3_bl item4_bl item5_bl item6_bl item7_bl)

*SOCIAL EMOTIONAL*
egen IDSEL_bl=rowmean(item1_bl item9_bl item10_bl item11_bl item12_bl)

*MOTOR*
egen IDMOT_bl=rowmean(item21_bl item22_bl item24_bl)

sum IDLIT_bl IDNUM_bl IDSEL_bl IDMOT_bl


	
	
**COVARIATES**

*key demographic questions from caregiver survey* 
*plus baseline of outcomes (see above)

*SES*
codebook b_educ_bl b_read_n_bl b_marry_bl b_educ_s_bl ///
prei_1_bl prei_9_bl prei_10_bl prei_11_bl prei_14_bl prei_15_bl ///
prei_16_bl prei_17_bl prei_21_bl prei_22_bl prei_24_bl prei_25_bl prei_26_bl ///
hh_l_sta_bl h_food_bl prei_mod1_bl prei_mod2_bl hh_mi_np_bl hh_i_b_bl
*going to use education and literacy as is for now
*skipping marital status because so little variation

foreach var of varlist b_educ_bl b_read_n_bl b_marry_bl b_educ_s_bl ///
prei_1_bl prei_9_bl prei_10_bl prei_11_bl prei_14_bl prei_15_bl ///
prei_16_bl prei_17_bl prei_21_bl prei_22_bl prei_24_bl prei_25_bl prei_26_bl ///
hh_l_sta_bl h_food_bl prei_mod1_bl prei_mod2_bl hh_mi_np_bl hh_i_b_bl {
replace `var'=. if `var'==-33
replace `var'=. if `var'==-77
replace `var'=. if `var'==-99
}



runmplus prei_1_bl prei_9_bl prei_10_bl prei_11_bl prei_14_bl prei_15_bl ///
prei_16_bl prei_17_bl prei_21_bl prei_22_bl prei_24_bl prei_25_bl prei_26_bl ///
hh_l_sta_bl h_food_bl prei_mod1_bl prei_mod2_bl hh_mi_np_bl hh_i_b_bl, ///
idvariable (hhid) ///
type (efa 1 5) ///
missing =. 
*looking like 1 9 10 11 16 17 sta food b_b
*and then 14 15 24 25  (with 24 loading very strongly)

*in other words sufficient resources/space (incl. healthcare/transit) /stabiliy
*and then help/support

alpha prei_1_bl prei_9_bl prei_10_bl prei_11_bl prei_16_bl prei_17_bl ///
hh_l_sta_bl h_food_bl hh_i_b_bl, item gen(ses_bl)
*0.7347
alpha prei_14_bl prei_15_bl prei_24_bl prei_25_bl, item gen(persup_bl)
*0.7739
*feel like these two might still be useful
codebook prei_mod1_bl prei_mod2_bl
pwcorr prei_mod1_bl prei_mod2_bl
*so let's come back to them*
*still going to impute for now though in case*


*child gender* 
tab child_sex_Ibl child_sex_Iel
*not an exact match, sadly, 27 switch 
codebook child_sex_Ibl child_sex_Iel 
*and of course we do not have for everyone because since in IDELA we did not ask in CG survey
*so going to make missing for those 27 and then impute all missing
gen Chfem=child_sex_Ibl
replace Chfem=child_sex_Iel if Chfem==. 
replace Chfem=. if child_sex_Ibl!=child_sex_Iel & child_sex_Ibl!=. & child_sex_Iel!=.
codebook Chfem


*family nationality
*in registration data, merging in below

*household risk (resources in SES above)*
	*people in household*
	codebook hh_num_bl hh_ch_u6_bl hh_ch_12_bl hh_ch_18_bl tot_ch_bl
	replace hh_ch_12_bl=. if hh_ch_12_bl==-99
	replace hh_ch_18_bl=. if hh_ch_18_bl==-99
	br hh_ch_u6_bl hh_ch_12_bl hh_ch_18_bl tot_ch_bl
	*is sum as it should be
	destring tot_ch_bl, replace
	list hh_num_bl hh_ch_u6_bl hh_ch_12_bl hh_ch_18_bl tot_ch_bl if hh_num_bl<=tot_ch_bl
	*seems good, did not ask how many adults so cannot check
	
	*moves/mobility*
	codebook b_hom_l_bl b_move_t_bl
	replace b_hom_l_bl=. if b_hom_l_bl==-99
	replace b_move_t_bl=. if b_move_t_bl==-99
	sum b_hom_l_bl b_move_t_bl
	*I think moves is the more relevant risk var
	
*caregiver and child health* 
foreach var of varlist h_o_gen_bl h_o_ment_bl h_sleep_bl fc_bwy_bl fc_heal_bl {
replace `var'=. if `var'==-99
}
codebook h_o_gen_bl h_o_ment_bl h_sleep_bl h_sleepy_bl fc_bwy_bl fc_heal_bl

*caregiver social support*
foreach var of varlist ss_famil_bl ss_frien_bl ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl {
replace `var'=. if `var'==-99
}
codebook ss_famil_bl ss_frien_bl ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl
*I will impute all and see but think only going to control for friends and family right now
*others are interesting descriptives though, maybe

*not listed but child temperament/character*
codebook fc_pl_43_bl fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl fc_pl_416_bl fc_pl_418_bl fc_pl_419_bl ///
fc_pl_51_bl fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl

foreach var of varlist fc_pl_43_bl fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl fc_pl_416_bl fc_pl_418_bl fc_pl_419_bl ///
fc_pl_51_bl fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl {
replace `var'=. if `var'==-99
}
*imputing so we have

*for further descriptives*
foreach var of varlist who_surv_bl who_surv_el p_env11_bl p_env12_bl p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl {
replace `var'=. if `var'==-99
replace `var'=. if `var'==-55
}

codebook who_surv_bl who_surv_el p_env11_bl p_env12_bl p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl

*not using/imputing: *
*b_part_h_bl b_part_y_bl b_part_0_bl
*f_agai_bl f_agai_y_bl f_reco_bl f_reco_y_bl f_sugg_bl f_sugg_d_bl f_enj_bl f_enj_d_bl f_oth_co_bl overall_notes_bl
*f_oth_co_el overall_notes_el
*b_marry_bl  prei_21_bl prei_22_bl prei_26_bl hh_mi_np_bl b_hom_l_bl
*h_sleepy_bl h_sleep_bl fc_bwy_bl 

***endline SES vars - we can think of what want to do with them***
codebook b_educ_el b_read_n_el b_hom_l_el b_move_t_el hh_num_el hh_ch_u6_el hh_ch_12_el hh_ch_18_el ///
prei_mod1_el prei_mod2_el h_o_ment_el 
replace b_read_n_el=. if b_read_n_el ==-99



save "using", replace
use "using", clear


*****MERGE in NATIONALITY****
use "~/nationality.dta", clear
codebook temp_id
gen hhid1=strlower(temp_id)
gen hhid=ustrtrim(hhid1)
sort hhid
keep hhid nationality_cg
drop if hhid==""
codebook hhid
duplicates drop
codebook hhid
*still some duplicate hhids
sort hhid
list if hhid==hhid[_n-1] | hhid==hhid[_n+1]
drop if nationality_cg=="n/a" /*not a helpful merge in anyway */
codebook hhid
list if hhid==hhid[_n-1] | hhid==hhid[_n+1]
replace nationality_cg="lebsyr" if hhid=="ez5879"
replace nationality_cg="palsyr" if hhid=="kh5305"
replace nationality_cg="lebsyr" if hhid=="mb9531"
duplicates drop
codebook hhid 
list hhid
replace hhid="jxmgc" if hhid=="jxmgc"
sort hhid
save "merge-in_nationality.dta", replace

****I AM GOING TO NEED CLUSTERING VARIABLES TOO****
import excel using "~\RANDOMIZATION\Final_Class_Randomization_03242022.xlsx", ///
sheet("Final_Class_Randomization_03242") firstrow case(lower) clear
codebook trackingsheetid
gen hhid=strlower(trackingsheetid)
codebook classroom teacher assistant
replace classroom ="" if classroom=="NA" /*these are tx kids*/
replace teacher ="" if teacher=="NA" /*these are tx kids*/
replace assistant ="" if assistant=="NA" /*these are tx and ELP kids*/
destring classroom teacher assistant, replace
keep hhid arm classroom teacher assistant
sort hhid
save "merge-in_classrooms.dta", replace


use "using", clear
tab _merge
drop _merge
sort hhid 
merge 1:1 hhid using "merge-in_nationality.dta"
*only 1138 matched
*so 469 without nationality data plus 3 with two nationalities
*521 not in our data
drop if _merge==2
rename _merge _nationality_merge

merge 1:1 hhid using "merge-in_classrooms.dta"
*all but the extra one added after we randomized match as expected
drop _merge

tab nationality_cg, missing
*gen nationa=.
*replace nationa=0 if nationality_cg=="syrian" | nationality_cg=="palsyr"
*replace nationa=1 if nationality_cg=="lebanese"
*going to code the palestinain/syrian as syrian (since a group of 1 won't run regardless
*and leave two listed both as syrian and lebanes as missing for now

codebook classroom teacher assistant

sort hhid
save "using", replace


use "C:\Users\Kathryn Schwartz\Box\ECDEC - Shared Files\04_Projects\Ahlan Simsim\Lebanon CENTER\6_RCT\Data\Quantitative\Registration\registration_covariates.dta", clear
codebook
drop if hhid==""
replace childGender=0 if childGender==2 | childGender==4
replace childGender=1 if childGender==1 | childGender==3
tab WashingtonGroupScore washingtongroupscore

sort hhid
save "reg_cov", replace

use "using", clear

merge hhid using "reg_cov"
tab _merge
*all merged
drop _merge
tab Chfem childGender
list hhid Chfem childGender if Chfem!=childGender & Chfem!=.
*per Lina's email
*bvsoo – female for sure since it is a female name (is what we had)
*gc1998 – female per our masterlist, the name is not an indication but the teacher never corrected us
replace Chfem=1 if hhid=="gc1998"
*jm6431 – Male for sure since it is a male name
replace Chfem=0 if hhid=="jm6431"
*mu5639 – Male for sure since it is a male name
replace Chfem=0 if hhid=="mu5639"
*xe8961 – female for sure since it is a female name
replace Chfem=1 if hhid=="xe8961"
*md1979– female for sure since it is a female name (is what we had)
*pw1071– Male for sure since it is a male name
replace Chfem=0 if hhid=="pw1071"
*wpnmv – female for sure since it is a female name
replace Chfem=1 if hhid=="wpnmv"
*zl1148 – Male for sure since it is a male name (is what we had)
*gz7204 – Male for sure since it is a male name
replace Chfem=0 if hhid=="gz7204"
*zl3019 – female per our masterlist, the name is not an indication but the teacher never corrected us
*(is what we had)

codebook nation
tab nationality_cg nation, missing
gen lebanese=0
replace lebanese=1 if nation==1

 
tab who_surv_el who_surv_bl, missing
count if who_surv_el!=who_surv_bl & who_surv_el!=. & who_surv_bl!=.
di 348/1330
tab who_surv_bl ch_child_rel
codebook who_surv_bl ch_child_rel
tab ch_child_rel
tab ch_child_rel, nolabel
gen who_surv_reg=1 if ch_child_rel==11 /*mother*/
replace who_surv_reg=2 if ch_child_rel==9 /*father*/
replace who_surv_reg=3 if ch_child_rel==8 /*brother*/
replace who_surv_reg=4 if ch_child_rel==12 /*sister*/
replace who_surv_reg=5 if ch_child_rel==7 /*aunt*/
replace who_surv_reg=6 if ch_child_rel==13 /*uncle*/
replace who_surv_reg=7 if ch_child_rel==10 /*grandmother*/
replace who_surv_reg=8 if ch_child_rel==1 | ch_child_rel==2 /*grandfather*/
replace who_surv_reg=9 if ch_child_rel==3 /*sister-in-law*/
replace who_surv_reg=10 if ch_child_rel==4 | ch_child_rel==5 | ch_child_rel==6 /*stepmothther*/
count if who_surv_reg!=who_surv_bl & who_surv_reg!=. & who_surv_bl!=.
tab who_surv_reg who_surv_bl
di 98/1461


gen cg_change=0
replace cg_change=1 if who_surv_el!=who_surv_bl & who_surv_el!=. & who_surv_bl!=.

*also important to interact this IN inmputation model

tab governorate
tab district
tab fam_type

tab n_siblings
codebook hh_ch_u6_el hh_ch_12_el hh_ch_18_el n_siblings
tab n_siblings hh_ch_u6_el
sum n_siblings hh_ch_u6_el
pwcorr n_siblings hh_ch_u6_el
*going to stick with what we ahd already here


sort hhid
merge hhid using "txfamilies_additionalinfo"
tab _merge
drop if _merge==2
drop _merge
tab tx totalattendance
by tx, sort: sum totalattendance, det
di 21.34/31
di 22.08/31

by tx, sort: sum totalinteractions, det

tab totalattendance if tx==1
tab totalattendance if tx==2

tab totalinteractions if tx==1
tab totalinteractions if tx==2

tab totalattendance totalinteractions if tx==1
tab totalattendance totalinteractions if tx==2

save "using", replace
replace totalattendance=0 if tx==0
tab tx totalattendance, missing

*the one still missing was assigned tx but did not decide to enroll until midway through program
*so they offered program when control group got it (meaning 0 attendance during study)
*still in tx arm for ITT but will make attendance 0
replace totalattendance=0 if totalattendance==.

replace totalinteractions=0 if tx==0
tab tx totalinteractions, missing
replace totalinteractions=0 if totalinteractions==.

rename totalinteractions RELPints
rename totalattendance RELPatte
rename firstenrollementdate RELPenro

save "using", replace


*pull in ASF attendance
import excel "C:\Users\Kathryn Schwartz\Box\ECDEC - Shared Files\04_Projects\Ahlan Simsim\Lebanon CENTER\6_RCT\Data\Quantitative\REL - wave 1 children and AS families-additionalinformation.xlsx", ///
sheet("REL AS families") firstrow case(lower) clear

gen hhid=strlower(childid)
sort hhid
save "ASF_merge_in", replace

use "using", clear
sort hhid
merge hhid using "ASF_merge_in"
tab _merge
*merged in on the 502 (501 plus extra not using in our analyses not randomized by us, cousin)
drop _merge

rename dateofenrollment ASFenro
rename ecdstaffincharge ASFfaci

tab ASFfaci assistant
*perfect, exactly the same

tab tx ofprimarysessionsattended, missing
replace ofprimarysessionsattended=0 if tx==0 | tx==1
rename ofprimarysessionsattended ASFprim

tab tx ofsessionsattended, missing
replace ofsessionsattended=0 if tx==0 | tx==1
rename ofsessionsattended ASFtotal

codebook RELPatte RELPints ASFprim ASFtotal


*Prior ECE*
codebook previous_ece
rename previous_ece priorece

save "using", replace


*switched teachers midway indicator*
*and switch which teacher to one had longer*
tab oldteacherid teacher
*line up except 5, 6, 7 (where assigned teacher starteed late so started with someone else)
tab newteacherid teacher
*line up except 17, who left a few weeks into program
*AND looks like 15 and 24 hhs respectively stayed with teacher 2 instead of going to 5 and 6

*we want to cluster on new teacher ID (is the longer one for all)
rename teacher assigned_teacher
gen teacher=newteacherid

gen tch_change=0
replace tch_change=1 if newteacherid!=oldteacherid
*152


list teacher classroom assistant oldteacherid assigned_teacher tch_change if hhid=="mz3644"
*this is hh not on treatment attendance sheets because opted not to enroll
*even though assigned to RELP
*and then changed mind and enrolled in summer programming with control families
*I should put all of this in as assigned, as in group, and just will be a no attendance
list hhid teacher classroom assistant oldteacherid assigned_teacher tch_change if classroom==10
replace teacher=7 if hhid=="mz3644"
replace oldteacherid=3 if hhid=="mz3644"
replace tch_change=1 if hhid=="mz3644"
list hhid teacher classroom assistant oldteacherid assigned_teacher tch_change if classroom==10

save "using", replace



*only keeping what using in these prelim analyses
keep today* enum* date* _CGsurvBLintoEL_merge _CGintoIDELAendline_merge ///
RESTintoIDELAbaseline_merge ID_bl ID_el CG_bl CG_el ///  /*dates, IDs, etc., not imputing currently*/
hhid tx group_el RELPatte RELPints ASFprim ASFtotal tch_change /// /*treatment arms, not imputing*/
classroom teacher  assistant /// /*clusters, not imputing*/
PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el belsel_el ///
IDLIT_el IDNUM_el IDSEL_el IDMOT_el playfreq_el /// /*outcomes*/
PHQ_bl GAD_bl psedis_bl psenur_bl pseply_bl pss_bl ecdact_bl belpl_bl belsel_bl ///
IDLIT_bl IDNUM_bl IDSEL_bl IDMOT_bl playfreq_bl /// /*baseline of outcomes*/
b_educ_bl b_read_n_bl ses_bl persup_bl hh_num_bl hh_ch_u6_bl /// 
b_move_t_bl h_o_gen_bl h_o_ment_bl fc_heal_bl ss_famil_bl ss_frien_bl ///
p_env11_bl /// /*covariates, impute*/
lebanese cg_change governorate fam_type Chfem ///
WashingtonGroupScore washingtongroupscore priorece /// /*covariates, complete*/
prei_mod1_bl prei_mod2_bl hh_ch_12_bl hh_ch_18_bl ///
ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl ///
p_env11_bl p_env12_bl p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl /// 
b_educ_s_bl prei_21_bl prei_22_bl prei_26_bl hh_mi_np_bl b_hom_l_bl h_sleep_bl fc_bwy_bl /// 
fc_pl_43_bl fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl ///
fc_pl_416_bl fc_pl_418_bl fc_pl_419_bl fc_pl_51_bl fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl ///
job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl job7_bl job8_bl job9_bl job10_bl job11_bl ///
fc_pl_1_el fc_pl_1_bl fc_pl_21_el fc_pl_22_el fc_pl_23_el fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl ///
chor_el reli_el meal_el chor_bl reli_bl meal_bl ///
p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl /// /*extra baseline, impute*/
b_educ_el b_read_n_el b_hom_l_el b_move_t_el ///
hh_num_el hh_ch_u6_el hh_ch_12_el hh_ch_18_el prei_mod1_el prei_mod2_el h_o_ment_el ///
job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el ///
p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el /// /*extra endline, impute*/
who_surv_bl who_surv_el district n_siblings RELPenro ASFenro ///  
assigned_teacher newteacherid oldteacherid /*extra, do not, categorical/complete, just want to have*/
 
save "Dec_Models", replace



**IMPUTATION**

clear
clear matrix
clear mata
set maxvar 32000

use "Dec_Models", clear
gen ELP_pl_vELP=.
replace ELP_pl_vELP=0 if tx==1
replace ELP_pl_vELP=1 if tx==2
ttest IDLIT_el, by(ELP_pl_vELP)
ttest IDNUM_el, by(ELP_pl_vELP)
ttest IDSEL_el, by(ELP_pl_vELP)
ttest IDMOT_el, by(ELP_pl_vELP)
***we actually want tx in model for outcome vars, only missing for the one person added so 
*I am going to pull in their later tx assignment in a dummy var*
gen tx_dummmy=tx 
replace tx_dummmy=group_el if tx==.

codebook PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el belsel_el ///
IDLIT_el IDNUM_el IDSEL_el IDMOT_el

codebook PHQ_bl GAD_bl psedis_bl psenur_bl pseply_bl pss_bl ecdact_bl belpl_bl belsel_bl ///
IDLIT_bl IDNUM_bl IDSEL_bl IDMOT_bl

codebook b_educ_bl b_read_n_bl ses_bl persup_bl Chfem hh_num_bl hh_ch_u6_bl /// 
b_move_t_bl h_o_gen_bl h_o_ment_bl fc_heal_bl ss_famil_bl ss_frien_bl ///
p_env11_bl lebanese

codebook prei_mod1_bl prei_mod2_bl hh_ch_12_bl hh_ch_18_bl ///
ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl p_env12_bl

*adding in following from baseline since preliminary analyses (mostly to have)
codebook playfreq_bl p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl b_educ_s_bl ///
prei_21_bl prei_22_bl prei_26_bl hh_mi_np_bl b_hom_l_bl h_sleep_bl fc_bwy_bl fc_pl_43_bl ///
fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl fc_pl_416_bl fc_pl_418_bl fc_pl_419_bl fc_pl_51_bl ///
fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl ///
job7_bl job8_bl job9_bl job10_bl job11_bl fc_pl_1_el fc_pl_1_bl fc_pl_21_el fc_pl_22_el ///
fc_pl_23_el fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl chor_el reli_el meal_el chor_bl reli_bl ///
meal_bl p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl

destring job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl ///
job7_bl job8_bl job9_bl job10_bl job11_bl, replace 

*adding in following from endline since preliminary analyses (mostly to have)
codebook playfreq_el b_educ_el b_read_n_el b_hom_l_el b_move_t_el hh_num_el hh_ch_u6_el ///
hh_ch_12_el hh_ch_18_el prei_mod1_el prei_mod2_el h_o_ment_el job1_el job2_el job3_el ///
job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el p_gha_1j_el p_gha_1b_el ///
p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el



codebook Chfem lebanese cg_change governorate fam_type WashingtonGroupScore ///
washingtongroupscore priorece n_siblings
*cg change can informa baseline and endline, rest of these baseline

codebook RELPatte RELPints ASFprim ASFtotal tch_change 
*these, like tx status, should only inform endline

mi set wide 

mi register imputed PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el ///
belpl_el belsel_el IDLIT_el IDNUM_el IDSEL_el IDMOT_el ///
PHQ_bl GAD_bl psedis_bl psenur_bl pseply_bl pss_bl ecdact_bl belpl_bl belsel_bl ///
IDLIT_bl IDNUM_bl IDSEL_bl IDMOT_bl ///
b_educ_bl b_read_n_bl ses_bl persup_bl  hh_num_bl hh_ch_u6_bl /// 
b_move_t_bl h_o_gen_bl h_o_ment_bl fc_heal_bl ss_famil_bl ss_frien_bl p_env11_bl ///
prei_mod1_bl prei_mod2_bl hh_ch_12_bl hh_ch_18_bl ///
ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl p_env12_bl ///
playfreq_bl p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl b_educ_s_bl ///
prei_21_bl prei_22_bl prei_26_bl hh_mi_np_bl b_hom_l_bl h_sleep_bl fc_bwy_bl ///
fc_pl_43_bl fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl fc_pl_416_bl fc_pl_418_bl fc_pl_419_bl fc_pl_51_bl ///
fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl ///
job7_bl job8_bl job9_bl job10_bl job11_bl fc_pl_1_el fc_pl_1_bl fc_pl_21_el fc_pl_22_el ///
fc_pl_23_el fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl chor_el reli_el meal_el chor_bl reli_bl ///
meal_bl p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl ///
playfreq_el b_educ_el b_read_n_el b_hom_l_el b_move_t_el hh_num_el hh_ch_u6_el ///
hh_ch_12_el hh_ch_18_el prei_mod1_el prei_mod2_el h_o_ment_el ///
job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el ///
p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el n_siblings


mi impute chained ///
(logit, omit(PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el belsel_el ///
IDLIT_el IDNUM_el IDSEL_el IDMOT_el i.tx_dummmy RELPatte RELPints ASFprim ASFtotal i.tch_change)) ///
p_env24_bl p_env25_bl p_env26_bl p_env27_bl p_env28_bl prei_26_bl hh_mi_np_bl fc_bwy_bl ///
job1_bl job2_bl job3_bl job4_bl job5_bl job6_bl job7_bl job8_bl job9_bl job10_bl job11_bl ///
(logit) job1_el job2_el job3_el job4_el job5_el job6_el job7_el job8_el job9_el job10_el job11_el ///
(pmm, knn(10) omit(PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el ///
belsel_el IDLIT_el IDNUM_el IDSEL_el IDMOT_el i.tx_dummmy RELPatte RELPints ASFprim ASFtotal ///
i.tch_change)) h_o_gen_bl h_o_ment_bl fc_heal_bl ///
prei_21_bl prei_22_bl h_sleep_bl fc_pl_43_bl fc_pl_48_bl fc_pl_49_bl fc_pl_411_bl fc_pl_416_bl ///
fc_pl_418_bl fc_pl_419_bl fc_pl_51_bl fc_pl_511_bl fc_pl_514_bl fc_pl_519_bl fc_pl_1_el ///
fc_pl_1_bl fc_pl_21_el fc_pl_22_el fc_pl_23_el fc_pl_21_bl fc_pl_22_bl fc_pl_23_bl ///
p_gha_1j_bl p_gha_1b_bl p_gha_1e_bl p_gha_1f_bl p_gha_1l_bl p_gha_1m_bl ///
(pmm, knn(10)) h_o_ment_el p_gha_1j_el p_gha_1b_el p_gha_1e_el p_gha_1f_el p_gha_1l_el p_gha_1m_el ///
(regress, omit(PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el ///
belsel_el IDLIT_el IDNUM_el IDSEL_el IDMOT_el i.tx_dummmy RELPatte RELPints ASFprim ASFtotal ///
i.tch_change)) PHQ_bl GAD_bl psedis_bl psenur_bl pseply_bl pss_bl ecdact_bl belpl_bl belsel_bl ///
IDLIT_bl IDNUM_bl IDSEL_bl IDMOT_bl b_educ_bl b_read_n_bl ses_bl persup_bl hh_num_bl ///
hh_ch_u6_bl b_move_t_bl ss_famil_bl ss_frien_bl p_env11_bl prei_mod1_bl prei_mod2_bl ///
hh_ch_12_bl hh_ch_18_bl ss1_bl ss2_bl ss3_bl ss4_bl ss5_bl ss6_bl ss7_bl ss8_bl p_env12_bl ///
playfreq_bl b_educ_s_bl b_hom_l_bl chor_el reli_el meal_el chor_bl reli_bl meal_bl n_siblings ///
(regress) PHQ_el GAD_el psedis_el psenur_el pseply_el pss_el ecdact_el belpl_el belsel_el ///
IDLIT_el IDNUM_el IDSEL_el IDMOT_el playfreq_el b_educ_el b_read_n_el b_hom_l_el b_move_t_el ///
hh_num_el hh_ch_u6_el hh_ch_12_el hh_ch_18_el prei_mod1_el prei_mod2_el = ///
i.tx_dummmy i.Chfem i.lebanese i.cg_change ib6.governorate i.fam_type WashingtonGroupScore ///
washingtongroupscore i.priorece RELPatte RELPints ASFprim ASFtotal i.tch_change, ///
add(100) rseed (94367) noisily

save "RELP_clean_deidentified", replace




